#!/usr/bin/python3

import os
import sys

default_copyright = 'The Qt Company Ltd.'
default_licenses = (
    'BSD-3-clause',
    'LGPL-3 or GPL-2',
    'GPL-3 with Qt-1.0 exception',
    'GFDL-NIV-1.3',
)

licenses_map = {
    'BSD': 'BSD-3-clause',
    'FDL': 'GFDL-NIV-1.3',
    'GPL-EXCEPT': 'GPL-3 with Qt-1.0 exception',
    'LGPL': 'LGPL-3 or GPL-2',
    'LGPL21': 'LGPL-2.1-or-3 with Qt-1.1 exception',
    'LGPL3': 'LGPL-3 or GPL-2+',
    'LGPL3-COMM': 'LGPL-3',
}

exclude_prefixes = (
    'header',
    '.git',
)

start_header = '## BEGIN AUTO GENERATED BLOCK'
end_header = '## END AUTO GENERATED BLOCK'


class CopyrightInfo():
    def __init__(self):
        self.min_years = {}
        self.max_years = {}
        self.files = []

    def add_file(self, authors, file):
        for min_year, max_year, author in authors:
            if author in self.min_years:
                self.min_years[author] = min(self.min_years[author], min_year)
            else:
                self.min_years[author] = min_year
            if author in self.max_years:
                self.max_years[author] = max(self.max_years[author], max_year)
            else:
                self.max_years[author] = max_year
        self.files.append(file)

    def get_strings(self, authors):
        for author in authors:
            min_year = self.min_years[author]
            max_year = self.max_years[author]
            if min_year == max_year:
                yield '%d %s' % (min_year, author)
            else:
                yield '%d-%d %s' % (min_year, max_year, author)


def canonicalize_author_name(author):
    if 'KDAB' in author:
        return 'Klarälvdalens Datakonsult AB, a KDAB Group company'
    if 'BlackBerry' in author:
        return 'BlackBerry Limited (formerly Research In Motion)'
    if 'Research In Motion' in author or 'Research in Motion' in author:
        return 'BlackBerry Limited (formerly Research In Motion)'
    if 'Ivan Komissarov' in author:
        return 'Ivan Komissarov <ABBAPOH@gmail.com>'
    if 'Martsum - tmartsum[at]gmail.com' in author:
        return 'Thorbjørn Lund Martsum <tmartsum@gmail.com>'
    if 'Olivier Goffart' in author:
        return 'Olivier Goffart <ogoffart@woboq.com>'
    if 'Robin Burchell' in author:
        return 'Robin Burchell <robin.burchell@viroteck.net>'
    return author


def parse_file(filename):
    print(filename, file=sys.stderr)
    license = None
    authors = []
    with open(filename) as file:
        try:
            data = file.readlines(500)
        except UnicodeDecodeError:
            return None, None
    for line in data:
        if line[3:16] == 'Copyright (C)' and line[0] != ' ':
            copyright = line[17:-1]
            max_year = min_year = int(copyright[:4])
            if copyright[4] == '-':
                max_year = int(copyright[5:9])
                author = copyright[10:]
            elif copyright[4:7] == ' - ':
                max_year = int(copyright[7:11])
                author = copyright[12:]
            else:
                author = copyright[5:]
            author = canonicalize_author_name(author)
            authors.append((min_year, max_year, author))
        if line[3:21] == '$QT_BEGIN_LICENSE:':
            license = licenses_map[line[21:-2]]
    return license, authors


def get_source_files(root_directory):
    for dirpath, dirnames, filenames in os.walk(root_directory):
        for filename in filenames:
            full_path = os.path.join(dirpath, filename)
            if full_path.startswith('./'):
                full_path = full_path[2:]
            if any(full_path.startswith(prefix) for prefix in exclude_prefixes):
                continue
            yield full_path


def format_list(title, strings):
    return title + ('\n' + ' ' * len(title)).join(strings)


def main(root_directory):
    with open('debian/copyright') as copyright_file:
        current_copyright = copyright_file.read()
        start_pos = current_copyright.find(start_header) + len(start_header) + 1
        start_data = current_copyright[:start_pos]
        end_pos = current_copyright.find(end_header) - 1
        end_data = current_copyright[end_pos:]
    with open('debian/copyright', 'w') as output_file:
        output_file.write(start_data)
        write_output(root_directory, output_file)
        output_file.write(end_data)

def write_output(root_directory, output_file):
    data = {}

    for filename in get_source_files(root_directory):
        license, authors = parse_file(filename)
        if license is None:
            continue
        if license not in data:
            data[license] = {}
        license_dict = data[license]
        authors_tuple = tuple(sorted(author[2] for author in authors))
        if authors_tuple not in license_dict:
            license_dict[authors_tuple] = CopyrightInfo()
        license_dict[authors_tuple].add_file(authors, filename)

    for license in sorted(data.keys()):
        output_file.write('\n## ' + license + '\n')
        license_dict = data[license]
        for authors in sorted(license_dict.keys()):
            if authors == (default_copyright,) and license in default_licenses:
                continue
            copyright_info = license_dict[authors]
            output_file.write('\n')
            output_file.write(format_list('Files: ', sorted(copyright_info.files)) + '\n')
            output_file.write(format_list('Copyright: ', copyright_info.get_strings(authors)) + '\n')
            output_file.write('License: ' + license + '\n')


if __name__ == '__main__':
    main('.')
